knitr::opts_chunk$set(echo = TRUE)
library(usmap)
library(ggplot2)
library(maps)
library(ggsn)
## Loading required package: grid
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
## x purrr::map()    masks maps::map()

Introduction

This provides insight and analysis into Beer and Brewery data across the US. Data was provided by Budweiser Executives for strategic decision making.

#library(tidyverse)
beers = read_csv(url("https://raw.githubusercontent.com/BivinSadler/MSDS_6306_Doing-Data-Science/Master/Unit%208%20and%209%20Case%20Study%201/Beers.csv"))
## Rows: 2410 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Name, Style
## dbl (5): Beer_ID, ABV, IBU, Brewery_id, Ounces
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
breweries = read_csv(url("https://raw.githubusercontent.com/BivinSadler/MSDS_6306_Doing-Data-Science/Master/Unit%208%20and%209%20Case%20Study%201/Breweries.csv"))
## Rows: 558 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Name, City, State
## dbl (1): Brew_ID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(beers)
##      Name              Beer_ID            ABV               IBU        
##  Length:2410        Min.   :   1.0   Min.   :0.00100   Min.   :  4.00  
##  Class :character   1st Qu.: 808.2   1st Qu.:0.05000   1st Qu.: 21.00  
##  Mode  :character   Median :1453.5   Median :0.05600   Median : 35.00  
##                     Mean   :1431.1   Mean   :0.05977   Mean   : 42.71  
##                     3rd Qu.:2075.8   3rd Qu.:0.06700   3rd Qu.: 64.00  
##                     Max.   :2692.0   Max.   :0.12800   Max.   :138.00  
##                                      NA's   :62        NA's   :1005    
##    Brewery_id       Style               Ounces     
##  Min.   :  1.0   Length:2410        Min.   : 8.40  
##  1st Qu.: 94.0   Class :character   1st Qu.:12.00  
##  Median :206.0   Mode  :character   Median :12.00  
##  Mean   :232.7                      Mean   :13.59  
##  3rd Qu.:367.0                      3rd Qu.:16.00  
##  Max.   :558.0                      Max.   :32.00  
## 
summary(breweries)
##     Brew_ID          Name               City              State          
##  Min.   :  1.0   Length:558         Length:558         Length:558        
##  1st Qu.:140.2   Class :character   Class :character   Class :character  
##  Median :279.5   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :279.5                                                           
##  3rd Qu.:418.8                                                           
##  Max.   :558.0
head(beers)
## # A tibble: 6 × 7
##   Name                Beer_ID   ABV   IBU Brewery_id Style                Ounces
##   <chr>                 <dbl> <dbl> <dbl>      <dbl> <chr>                 <dbl>
## 1 Pub Beer               1436 0.05     NA        409 American Pale Lager      12
## 2 Devil's Cup            2265 0.066    NA        178 American Pale Ale (…     12
## 3 Rise of the Phoenix    2264 0.071    NA        178 American IPA             12
## 4 Sinister               2263 0.09     NA        178 American Double / I…     12
## 5 Sex and Candy          2262 0.075    NA        178 American IPA             12
## 6 Black Exodus           2261 0.077    NA        178 Oatmeal Stout            12
head(breweries)
## # A tibble: 6 × 4
##   Brew_ID Name                      City          State
##     <dbl> <chr>                     <chr>         <chr>
## 1       1 NorthGate Brewing         Minneapolis   MN   
## 2       2 Against the Grain Brewery Louisville    KY   
## 3       3 Jack's Abby Craft Lagers  Framingham    MA   
## 4       4 Mike Hess Brewing Company San Diego     CA   
## 5       5 Fort Point Beer Company   San Francisco CA   
## 6       6 COAST Brewing Company     Charleston    SC
  1. How many breweries are present in each state? # Breweries by State
#state_data -- Breweries Filtered by State - joined with FIPS code by State
#mutate - Join FIPS data with Breweries data
#trimws - remove Leading/Trailing Whitespace from column(State)
#fips = State by Abbreviation - ie. TX

state_data = breweries %>% count(State) %>% mutate(fips = fips(trimws(State)))

us_map <- usmap::us_map() # used to add map scale

usmap::plot_usmap(data = state_data, values = "n", labels = T)+
  labs(fill = 'Breweries by State') + 
  scale_fill_gradientn(colours=rev(topo.colors(50)),na.value="grey90",
                       guide = guide_colourbar(barwidth = 25, barheight = 0.6,
#put legend title on top of legend
title.position = "top")) +

  # put legend at the bottom, adjust legend title and text font sizes
  theme(legend.position = "bottom",
        legend.title=element_text(size=12), 
        legend.text=element_text(size=10))

  1. Merge beer data with the breweries data. Print the first 6 observations and the last six observations to check the merged file. (RMD only, this does not need to be included in the presentation or the deck.)
#Merge beer and breweries data - trim whitespace - add in FIPS codes
full_data = merge(beers,breweries,by.y="Brew_ID",by.x="Brewery_id") %>%
  mutate(fips = fips(trimws(State)))

head(full_data)
##   Brewery_id        Name.x Beer_ID   ABV IBU
## 1          1  Get Together    2692 0.045  50
## 2          1 Maggie's Leap    2691 0.049  26
## 3          1    Wall's End    2690 0.048  19
## 4          1       Pumpion    2689 0.060  38
## 5          1    Stronghold    2688 0.060  25
## 6          1   Parapet ESB    2687 0.056  47
##                                 Style Ounces            Name.y        City
## 1                        American IPA     16 NorthGate Brewing Minneapolis
## 2                  Milk / Sweet Stout     16 NorthGate Brewing Minneapolis
## 3                   English Brown Ale     16 NorthGate Brewing Minneapolis
## 4                         Pumpkin Ale     16 NorthGate Brewing Minneapolis
## 5                     American Porter     16 NorthGate Brewing Minneapolis
## 6 Extra Special / Strong Bitter (ESB)     16 NorthGate Brewing Minneapolis
##   State fips
## 1    MN   27
## 2    MN   27
## 3    MN   27
## 4    MN   27
## 5    MN   27
## 6    MN   27
tail(full_data)
##      Brewery_id                    Name.x Beer_ID   ABV IBU
## 2405        556             Pilsner Ukiah      98 0.055  NA
## 2406        557  Heinnieweisse Weissebier      52 0.049  NA
## 2407        557           Snapperhead IPA      51 0.068  NA
## 2408        557         Moo Thunder Stout      50 0.049  NA
## 2409        557         Porkslap Pale Ale      49 0.043  NA
## 2410        558 Urban Wilderness Pale Ale      30 0.049  NA
##                        Style Ounces                        Name.y          City
## 2405         German Pilsener     12         Ukiah Brewing Company         Ukiah
## 2406              Hefeweizen     12       Butternuts Beer and Ale Garrattsville
## 2407            American IPA     12       Butternuts Beer and Ale Garrattsville
## 2408      Milk / Sweet Stout     12       Butternuts Beer and Ale Garrattsville
## 2409 American Pale Ale (APA)     12       Butternuts Beer and Ale Garrattsville
## 2410        English Pale Ale     12 Sleeping Lady Brewing Company     Anchorage
##      State fips
## 2405    CA   06
## 2406    NY   36
## 2407    NY   36
## 2408    NY   36
## 2409    NY   36
## 2410    AK   02
full_data[sample(1:nrow(full_data),6),]
##      Brewery_id                  Name.x Beer_ID   ABV IBU
## 2240        491   Old Elephant Foot IPA     544 0.070  80
## 441          64          Boohai Red Ale     940    NA  NA
## 1078        177 Pride of Texas Pale Ale    2229 0.058  60
## 1427        265       Euphoria Pale Ale      69 0.061  NA
## 1897        381          1327 Pod's ESB     886 0.056  37
## 15            2             Rico Sauvin    2678 0.076  68
##                                    Style Ounces                         Name.y
## 2240                        American IPA     16      Tampa Bay Brewing Company
## 441             American Amber / Red Ale     12 Crazy Mountain Brewing Company
## 1078             American Pale Ale (APA)     12    Rahr & Sons Brewing Company
## 1427             American Pale Ale (APA)     12            Ska Brewing Company
## 1897 Extra Special / Strong Bitter (ESB)     12     Blue Blood Brewing Company
## 15        American Double / Imperial IPA     16      Against the Grain Brewery
##            City State fips
## 2240      Tampa    FL   12
## 441     Edwards    CO   08
## 1078 Fort Worth    TX   48
## 1427    Durango    CO   08
## 1897    Lincoln    NE   31
## 15   Louisville    KY   21
  1. Address the missing values in each column.

Two Datasets, “Beers.csv” and “Breweries.csv” were joined to create a Full Dataset for analysis and insight

Only ABV and IBU are missing Data Values

Important to note: IBU is missing values in around 42% of the 2,410 total rows of data

**Predictions for IBU specifically, may be skewed

#use colSum to sum up the NA's in each column
full_dataNAs <- as.data.frame(colSums(is.na(full_data)))
names(full_dataNAs) <- c("Missing Values (NAs)")
full_dataNAs
##            Missing Values (NAs)
## Brewery_id                    0
## Name.x                        0
## Beer_ID                       0
## ABV                          62
## IBU                        1005
## Style                         5
## Ounces                        0
## Name.y                        0
## City                          0
## State                         0
## fips                          0
summary(full_data)
##    Brewery_id       Name.x             Beer_ID            ABV         
##  Min.   :  1.0   Length:2410        Min.   :   1.0   Min.   :0.00100  
##  1st Qu.: 94.0   Class :character   1st Qu.: 808.2   1st Qu.:0.05000  
##  Median :206.0   Mode  :character   Median :1453.5   Median :0.05600  
##  Mean   :232.7                      Mean   :1431.1   Mean   :0.05977  
##  3rd Qu.:367.0                      3rd Qu.:2075.8   3rd Qu.:0.06700  
##  Max.   :558.0                      Max.   :2692.0   Max.   :0.12800  
##                                                      NA's   :62       
##       IBU            Style               Ounces         Name.y         
##  Min.   :  4.00   Length:2410        Min.   : 8.40   Length:2410       
##  1st Qu.: 21.00   Class :character   1st Qu.:12.00   Class :character  
##  Median : 35.00   Mode  :character   Median :12.00   Mode  :character  
##  Mean   : 42.71                      Mean   :13.59                     
##  3rd Qu.: 64.00                      3rd Qu.:16.00                     
##  Max.   :138.00                      Max.   :32.00                     
##  NA's   :1005                                                          
##      City              State               fips          
##  Length:2410        Length:2410        Length:2410       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 
1005/2410 #Calculate percentage missing data from IBU Column
## [1] 0.4170124
  1. Compute the median alcohol content and international bitterness unit for each state. Plot a bar chart to compare. # Alcohol Content, Bitterness by State - BarChart
#Preliminary Steps

#Import Beer and Brewery Data Sets
Beer = read.csv("https://raw.githubusercontent.com/BivinSadler/MSDS_6306_Doing-Data-Science/Master/Unit%208%20and%209%20Case%20Study%201/Beers.csv", header = TRUE)
Breweries = read.csv("https://raw.githubusercontent.com/BivinSadler/MSDS_6306_Doing-Data-Science/Master/Unit%208%20and%209%20Case%20Study%201/Breweries.csv", header = TRUE)

#Updating the name of the first column of the breweries dataset so there is a matching column between the two datasets
names(Breweries)[1] <- 'Brewery_id'

#Merging the two sets together by the Brewery ID column
BB = merge(Beer,Breweries, by="Brewery_id", all = TRUE)
#Updating the mame of the 8th column so that it is Brewery
names(BB)[8] <- 'Brewery'
#Omitting all N/A values from the ABV and IBU columns
x = na.omit(BB[,c(4,10)])
y = na.omit(BB[,c(5,10)])

Alcohol by Volume Comparison by State

#Finding the median ABV of each state
medians = aggregate(x=x$ABV, by = list(x$State), FUN = median)
#Finding the median IBU of each state
mediansIBU = aggregate(x=y$IBU, by = list(y$State), FUN = median)
#Updating column names of the median ABV dataset
names(medians)[1]<- 'State'
names(medians)[2]<- 'ABV'
#Updating column names of the median IBU dataset
names(mediansIBU)[1]<- 'State'
names(mediansIBU)[2]<- 'IBU'

#Plotting the median ABV of each state
p = medians %>%
  arrange(desc(ABV, State)) %>%
ggplot(mapping = aes(x=State, y=ABV)) + geom_bar(stat = 'identity', fill = topo.colors(51), width = .5,color = 'black', show.legend = TRUE) + labs(y= "Median ABV", title = "Median ABV (Beers) by State") 

ggplotly(p) #Add interactive data labels

Bitterness Comparison by State

#Plotting the median IBU of each state
p = mediansIBU %>%
    arrange(desc(IBU, State)) %>%
ggplot(mapping = aes(x=State, y=IBU)) + geom_bar(stat = 'identity', fill = topo.colors(50), width = .5,color = 'black') + labs(y= "Median IBU", title = "Median IBU of Beers by State") 

ggplotly(p) #Add interacitve data labels

5a. Which state has the maximum alcoholic (ABV) beer? #Max ABV = CO

#Finding the highest ABV of all the states
MaxABV = aggregate(x$ABV, by = list(x$State),max)
#Finding the highest IBU of all the states
MaxIBU = aggregate(y$IBU, by = list(y$State),max)
#Updating column names of the max IBU dataset
names(MaxIBU)[1]<- 'State'
names(MaxIBU)[2]<- 'IBU'
#Updating column names of the max ABV dataset
names(MaxABV)[1]<- 'State'
names(MaxABV)[2]<- 'ABV'
#Order the MAX ABV and IBU datasets in descending order
MaxABV = BB[order(BB$ABV, decreasing = TRUE),]
MaxIBU = BB[order(BB$IBU, decreasing = TRUE),]

#Plotting the Max ABV of each state
p = MaxABV %>%
ggplot(mapping = aes(x=State, y=ABV)) + geom_bar(stat = "identity", fill = "darkslateblue") + labs(y="ABV", title = "Alcohol by Volume per State") 

ggplotly(p)
## Warning: Removed 62 rows containing missing values (position_stack).

5b. Which state has the most bitter (IBU) beer? #Most Bitter Beer = OR

#Plotting the Max IBU of each state
p = MaxIBU %>%
ggplot(mapping = aes(x=State, y=IBU)) + geom_bar(stat = "identity", fill = "chartreuse3", width = .5,color = 'darkslateblue') + labs(y="IBU", title = "Highest Bitterness per State") 
ggplotly(p)
## Warning: Removed 1005 rows containing missing values (position_stack).
  1. Comment on the summary statistics and distribution of the ABV variable.
#Budweiser

Beers = read_csv(url("https://raw.githubusercontent.com/BivinSadler/MSDS_6306_Doing-Data-Science/Master/Unit%208%20and%209%20Case%20Study%201/Beers.csv"))
## Rows: 2410 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Name, Style
## dbl (5): Beer_ID, ABV, IBU, Brewery_id, Ounces
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Breweries = read_csv(url("https://raw.githubusercontent.com/BivinSadler/MSDS_6306_Doing-Data-Science/Master/Unit%208%20and%209%20Case%20Study%201/Breweries.csv"))
## Rows: 558 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Name, City, State
## dbl (1): Brew_ID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#merge the data before performing EDA
names(Breweries)[1] <- 'Brewery_id'
Breweries
## # A tibble: 558 × 4
##    Brewery_id Name                         City          State
##         <dbl> <chr>                        <chr>         <chr>
##  1          1 NorthGate Brewing            Minneapolis   MN   
##  2          2 Against the Grain Brewery    Louisville    KY   
##  3          3 Jack's Abby Craft Lagers     Framingham    MA   
##  4          4 Mike Hess Brewing Company    San Diego     CA   
##  5          5 Fort Point Beer Company      San Francisco CA   
##  6          6 COAST Brewing Company        Charleston    SC   
##  7          7 Great Divide Brewing Company Denver        CO   
##  8          8 Tapistry Brewing             Bridgman      MI   
##  9          9 Big Lake Brewing             Holland       MI   
## 10         10 The Mitten Brewing Company   Grand Rapids  MI   
## # … with 548 more rows
Beer_Merge <- merge(x=Beers,y=Breweries, by = "Brewery_id", all.x = TRUE)

lookup = data.frame(abb = state.abb, State = state.name) #makes a data frame with State name and abbreviation.

summary(Beer_Merge$ABV) #see summary statistics of mean, median, mode, and quartiles
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00100 0.05000 0.05600 0.05977 0.06700 0.12800      62
#plot the distribution of ABV
Beer_Merge %>%
  ggplot(aes(x = ABV)) + geom_density(binwidth = .5, colour="black",fill="chartreuse3") +
  geom_vline(aes(xintercept=mean(ABV, na.rm=T)),
             color = "red", linetype = "dashed", size = 1) +
  ggtitle("Distribution Density of ABV")
## Warning: Ignoring unknown parameters: binwidth
## Warning: Removed 62 rows containing non-finite values (stat_density).

  1. Is there an apparent relationship between the bitterness of the beer and its alcoholic content? Draw a scatter plot. Make your best judgment of a relationship and EXPLAIN your answer.

There appears to a positive correlation between ABV and IBU however, IBU values may be skewed due to missing IBU data - 42% of total missing

Beer_Merge %>%
  ggplot(aes(x = ABV, y = IBU, fill = "sample")) +
  geom_point() + geom_smooth(method =  lm) +
  ggtitle("Relationship Between IBU and ABV")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1005 rows containing non-finite values (stat_smooth).
## Warning: Removed 1005 rows containing missing values (geom_point).

correlation_test = cor.test(Beer_Merge$ABV,Beer_Merge$IBU) #run correlation to determine p value and r-sqaured
correlation_test #showcase statistics
## 
##  Pearson's product-moment correlation
## 
## data:  Beer_Merge$ABV and Beer_Merge$IBU
## t = 33.863, df = 1403, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6407982 0.6984238
## sample estimates:
##       cor 
## 0.6706215
#The scatterplot shows a positive correlation between level of bitterness and alcohol by volume indicating that the higher the bitterness the more alcohol that tends to be contatined within

#Even with a sample of 1005 beers out of 2410 observations in total of IBU, we can conclude that a p value of 2.2e-16 we can say,
#we should reject the null hypothesis given an intercept of 0, which is further proved by the confidence interval
#being .64 to .69 at alpha of .05 or 95%.
#There appears to be a strong relationship with a r-squared correlation of .6706 squared = .45.
#It's a fair to state that when the ABV or IBU is increased the other shall also increase.

##Bonus Insights

Top Colorado Beer Styles

#Top 2 States _____#CO | #CA - Top Beer Styles for Top 2 States 

COStyle = full_data %>% filter(fips == "08") %>% select(State,Style) %>% group_by(State) %>% 
  count(Style) %>% slice_max(n,n=5) %>% mutate(fips = fips(trimws(State)))

#Colorado Top Beer Styles BarChart
p = COStyle %>% 
  arrange(desc(COStyle$Style)) %>% 
  mutate(Style = factor(Style)) %>% 
  ggplot(aes(y=Style,x=COStyle$n, fill = COStyle$n)) + geom_bar(stat='identity', show.legend = TRUE, width = .8) +
  labs(fill = 'CO Beer Styles') +
  ggtitle("Top Colorado Beer Styles")

ggplotly(p)

Top California Beer Styles

CAStyle = full_data %>% filter(fips == "06") %>% select(State,Style) %>% group_by(State) %>% 
  count(Style) %>% slice_max(n,n=5) %>% mutate(fips = fips(trimws(State)))


#California Top Beer Styles BarChart
p = CAStyle %>% 
  arrange(desc(CAStyle$Style)) %>% 
  mutate(Style = factor(Style)) %>% 
  ggplot(mapping = aes(y=Style,x=CAStyle$n, fill = CAStyle$n)) + geom_bar(stat='identity', show.legend = TRUE, width = .8) +
  labs(fill = 'CA Beer Styles') +
ggtitle("Top California Beer Styles")
  
ggplotly(p)

Nearest Neighbor (KNN) Prediction for Ale

#Filter data for IPA and Ale 
Aledata = full_data %>% mutate(IPA = grepl('IPA',full_data$Style), Ale  = grepl('Ale',full_data$Style)) %>% filter(IPA | Ale) %>% select(Ale,IPA,ABV,IBU, Style)

Aledata = Aledata %>% mutate(Alcohol_by_Volume = scale(ABV),biterness_scale = scale(IBU)) #Get Ranges of ABV and IBU data

summary(Aledata)
##     Ale             IPA               ABV               IBU        
##  Mode :logical   Mode :logical   Min.   :0.02700   Min.   :  4.00  
##  FALSE:558       FALSE:963       1st Qu.:0.05100   1st Qu.: 27.00  
##  TRUE :976       TRUE :571       Median :0.06000   Median : 45.00  
##                                  Mean   :0.06131   Mean   : 49.95  
##                                  3rd Qu.:0.07000   3rd Qu.: 70.00  
##                                  Max.   :0.09900   Max.   :138.00  
##                                  NA's   :42        NA's   :590     
##     Style           Alcohol_by_Volume.V1 biterness_scale.V1
##  Length:1534        Min.   :-2.62975     Min.   :-1.7480   
##  Class :character   1st Qu.:-0.79021     1st Qu.:-0.8731   
##  Mode  :character   Median :-0.10038     Median :-0.1884   
##                     Mean   : 0.00000     Mean   : 0.0000   
##                     3rd Qu.: 0.66609     3rd Qu.: 0.7625   
##                     Max.   : 2.88887     Max.   : 3.3492   
##                     NA's   :42           NA's   :590
library(tidyverse)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(class)

train = Aledata %>% select(Alcohol_by_Volume,biterness_scale,Ale,ABV,IBU) %>% na.omit()

dim(train) #944 rows
## [1] 944   5
head(train)
##   Alcohol_by_Volume biterness_scale   Ale   ABV IBU
## 1        -1.2500921     0.001772975 FALSE 0.045  50
## 2        -1.0201500    -1.177416789  TRUE 0.048  19
## 3        -0.1003814    -0.454687579  TRUE 0.060  38
## 4         1.4325663     0.686463806 FALSE 0.080  68
## 5        -1.4800343    -0.302534061  TRUE 0.042  42
## 6         0.3595029    -1.101340030  TRUE 0.066  21
trainIndex = sample(seq(1,944,1),100)

train2 = train[trainIndex,]

test2 = train[-trainIndex,]

AleType = test2$Ale

#IBU and ABV
classifications = knn(train2[,c(4,5)],test2[,c(4,5)],cl = train2$Ale,k = 28)
CM = table(classifications,AleType)
CM2 = confusionMatrix(CM)
CM2
## Confusion Matrix and Statistics
## 
##                AleType
## classifications FALSE TRUE
##           FALSE   301   96
##           TRUE     40  407
##                                          
##                Accuracy : 0.8389         
##                  95% CI : (0.8123, 0.863)
##     No Information Rate : 0.596          
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.674          
##                                          
##  Mcnemar's Test P-Value : 2.403e-06      
##                                          
##             Sensitivity : 0.8827         
##             Specificity : 0.8091         
##          Pos Pred Value : 0.7582         
##          Neg Pred Value : 0.9105         
##              Prevalence : 0.4040         
##          Detection Rate : 0.3566         
##    Detection Prevalence : 0.4704         
##       Balanced Accuracy : 0.8459         
##                                          
##        'Positive' Class : FALSE          
## 
CM2$overall[1] # accuracy
##  Accuracy 
## 0.8388626

Ale Classification by Alcohol Content and Bitterness

#Testing for Style of Ale based upon Alcohol Content and Bitterness